from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from eval_prompt import *
import json, ast, os, time
import PyPDF2
from Evaluator import ensure_dir, pdf_to_text
import asyncio, subprocess

API_URL = 'http://localhost:8001/v1'
model_name = "/data/server_cache/Qwen/Qwen3-32B/ggml-model-f16.gguf"

LLM_client = ChatOpenAI(model_name=model_name, base_url=API_URL, api_key='EMPTY')

EVAL_SOURCES = [
    ("Baseline",   "Baseline"),          # adjust names if needed
    ("Claude",     "Claude"),
    ("Gemini",     "Gemini"),
    ("DeepSeek",   "DeepSeek"),
    ("Qwen",       "Qwen"),
]

MAX_RETRIES = 3     
RETRY_DELAY = 3

async def execute_nusmv(program_pth):
    outputs = []
    process = await asyncio.create_subprocess_exec(
        'NuSMV',
        program_pth,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )
    try:
        stdout, stderr = await process.communicate()
        stdout = stdout.decode('utf-8', errors='ignore') if stdout else ""
        stderr = stderr.decode('utf-8', errors='ignore') if stderr else ""
        outputs.append(stdout)
    except Exception as e:
        # process.cancel()
        raise ValueError(f"The input code file at provided program path='{program_pth}' did not execute successfully")
    finally:
        if process.returncode !=0:
            outputs.append(stderr)
    outputs = "\n".join(outputs)
    return outputs

def evaluator(expert:str, generated:str, pth: str, generated_pth:str):
    txt = pdf_to_text(pth)
    generated_out = asyncio.run(execute_nusmv(generated_pth))
    eval_prompt = ChatPromptTemplate.from_messages([
        ("system", VERIFIABILITY_SYS_PROMPT),
        ("human", VERIFIABILITY_PROMPT)
    ])
    eval_chain = (eval_prompt | LLM_client)
    response = eval_chain.invoke({
        'SOP_TXT': txt,
        'expert_smv': expert,
        'agent_smv': generated,
        'cli_output': generated_out
    })
    
    return response

def run_evaluations():
    parent_dir   = '/home/SpecMAS'
    expert_dir   = os.path.join(parent_dir, 'Eval_files', 'Expert')
    eval_root    = os.path.join(parent_dir, 'Eval_files')
    sop_root     = parent_dir
    out_root     = os.path.join(parent_dir, 'Eval_out', 'Evaluations_v2_run_2')

    failures = []  # list of (source, model_num)

    # First pass
    for src_folder, _ in EVAL_SOURCES:
        gen_dir = os.path.join(eval_root, src_folder)
        out_dir = os.path.join(out_root,  src_folder)
        ensure_dir(out_dir)

        for i in range(1, 11):
            model_num     = f"{i:02d}"
            expert_file   = os.path.join(expert_dir,    f"model_{model_num}.smv")
            generated_file= os.path.join(gen_dir,        f"model_{model_num}.smv")
            sop_file      = os.path.join(sop_root,       f"sop_{model_num}.pdf")
            out_file      = os.path.join(out_dir,        f"eval_model_{model_num}.json")

            if os.path.exists(out_file):
                print(f"⏭️  Skipping {src_folder}/model_{model_num} (already evaluated)")
                continue

            # skip missing files
            if not (os.path.exists(expert_file) and os.path.exists(generated_file) and os.path.exists(sop_file)):
                print(f"❌ Missing files for {src_folder}/{model_num}, skipping")
                continue
            
            
            try:
                print(f"➡️  Evaluating {src_folder} model_{model_num}...")
                expert_txt    = open(expert_file,    'r').read()
                generated_txt = open(generated_file, 'r').read()
                response = evaluator(expert_txt, generated_txt, sop_file, generated_file)
                print(response.content)
                # extract JSON and save 
                body = response.content.split("```json")[1].strip()[:-3].strip()
                print(body)
                with open(out_file, 'w') as fw:
                    json.dump(json.loads(body), fw, indent=4)
                print(f"✅ Saved evaluation to {out_file}")

            except Exception as e:
                print(f"❌ Evaluation failed for {src_folder}/{model_num}: {e}")
                failures.append((src_folder, model_num))

    # Retry logic
    if failures:
        print(f"\n🔄 Retrying {len(failures)} failed evaluations (up to {MAX_RETRIES} times)...")
    for attempt in range(1, MAX_RETRIES + 1):
        if not failures:
            break
        print(f"\n🔁 Retry attempt {attempt}")
        new_failures = []
        for src_folder, model_num in failures:
            gen_dir = os.path.join(eval_root, src_folder)
            out_dir = os.path.join(out_root,  src_folder)
            ensure_dir(out_dir)

            expert_file    = os.path.join(expert_dir,     f"model_{model_num}.smv")
            generated_file = os.path.join(gen_dir,         f"model_{model_num}.smv")
            sop_file       = os.path.join(sop_root,        f"sop_{model_num}.pdf")
            out_file       = os.path.join(out_dir,         f"eval_model_{model_num}.json")

            if os.path.exists(out_file):
                print(f"⏭️  Skipping {src_folder}/model_{model_num} (already evaluated)")
                continue
            try:
                time.sleep(RETRY_DELAY)
                print(f"➡️  Retrying {src_folder} model_{model_num}...")
                expert_txt    = open(expert_file,    'r').read()
                generated_txt = open(generated_file, 'r').read()
                response = evaluator(expert_txt, generated_txt, sop_file, generated_file)

                body = response.content.split("```json")[1].strip()[:-3].strip()
                with open(out_file, 'w') as fw:
                    json.dump(json.loads(body), fw, indent=4)
                print(f"✅ Saved evaluation to {out_file}")

            except Exception as e:
                print(f"❌ Still failing for {src_folder}/{model_num}: {e}")
                new_failures.append((src_folder, model_num))

        failures = new_failures

    # Summary
    if not failures:
        print("\nAll evaluations completed successfully!")
    else:
        print(f"\n❌ Persisting failures for {len(failures)} models:")
        for src, num in failures:
            print(f"   • {src}/model_{num}")

if __name__ == "__main__":
    run_evaluations()